// equivalent to income-process_SOEP
// but using PSID data
// this version 4/22/19

set scheme s1mono
graph set window fontface "Garamond"

global size size(large)
global ls labsize(medlarge)
global legs medlarge

*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
* SET GLOBAL $PATHS
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
global root = "INSERT CUSTOMIZED PATH"
global data	 = "$root/data"
global tables	= "$root/figsandtabs"
global figures	= "$root/figsandtabs"

*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


use "$data/temp/PSID1984_2015_short.dta", clear
keep if year>=1984

ren x11101LL pid

tsset pid year


//note here that I am re-labeling just for the purpose of using the same
//code I used for Germany

ren college Abi
ren highschool Realschule
ren dropout Fachhochschule

global min = 25
global pol = "age age_2"
global poln = "2nd_order"
global maxage = 85

gen lwage = ln(o_eqpostII)

ren Abi Abi_s
bysort pid: egen Abi = max(Abi_s)
bysort pid: egen Real = max(Realschule)
bysort pid: egen Fan = max(Fachhochschule)

gen maxEd = 10 if Real==1 & Abi==0 & Fan==0
replace maxEd = 13 if Abi==1 

global break_abi = 0
global break_rea = 0
gen age_2 = age*age
gen age_b1 = (age-$break_abi)*(age>=$break_abi) if maxEd==13
replace age_b1 = (age-$break_rea)*(age>=$break_rea) if maxEd==10

gen age_b1_sq = (age-$break_abi)^2*(age>=$break_abi) if maxEd==13
replace age_b1_sq = (age-$break_rea)^2*(age>=$break_rea)^2 if maxEd==10

global break_abi_2 = 0
global break_rea_2 = 0

gen age_b2 = (age-$break_abi_2)*(age>=$break_abi_2) if maxEd==13
replace age_b2 = (age-$break_rea_2)*(age>=$break_rea_2) if maxEd==10

gen age_b2_sq = (age-$break_abi_2)^2*(age>=$break_abi_2)^2 if maxEd==13
replace age_b2_sq = (age-$break_rea_2)^2*(age>=$break_rea_2)^2 if maxEd==10

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if maxEd==10 & age>=25, abs(pid) 
predict pred_rea if e(sample), xb
gen resid_rea = lwage-pred_rea
qui summ resid_rea
global s_rea = r(sd)

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if maxEd==13 & age>=25, abs(pid) 
predict pred_abi if e(sample), xb
gen resid_abi = lwage-pred_abi
qui summ resid_abi
global s_abi = r(sd)

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if age>=25, abs(pid) 
predict pred_phi if e(sample), xb

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if age>=25, abs(pid) 
predict pred_avg if e(sample), xb

reg lwage i.age if maxEd==13 & age>=25
global overall_c_abi =_b[_cons]
reg lwage i.age i.year if maxEd==13 & age>=25

global c_1984_abi = _b[_cons]
global c_2007_abi = _b[2007.year]
global c_2009_abi = _b[2009.year]
global c_2011_abi = _b[2011.year]

global fix_abi = $c_1984_abi+1/3*($c_2007_abi+$c_2009_abi+$c_2011_abi)-$overall_c_abi

reg lwage i.age if maxEd==10 & age>=25
global overall_c_rea =_b[_cons]
reg lwage i.age i.year if maxEd==10 & age>=25
global c_1984_rea = _b[_cons]
global c_2007_rea = _b[2007.year]
global c_2009_rea = _b[2009.year]
global c_2011_rea = _b[2011.year]

global fix_rea = $c_1984_rea+1/3*($c_2007_rea+$c_2009_rea+$c_2011_rea)-$overall_c_rea

*******************************
** plot age residuals
*******************************

areg lwage i.age if maxEd==10 & age>=25, abs(pid) 
matrix A_rea = e(b)
matrix A_rea = A_rea'

areg lwage i.age if maxEd==13 & age>=25, abs(pid) 
matrix A_abi = e(b)
matrix A_abi = A_abi'

areg lwage i.age if age>=25, abs(pid) 
matrix A_avg = e(b)
matrix A_avg = A_avg'

keep if age>=25
collapse (mean) pred_rea pred_abi pred_avg, by(age)
svmat A_rea
summ A_rea
replace A_rea = A_rea + r(max)
summ age if A_rea!=.
replace A_rea = . if age==r(max)

svmat A_abi
summ A_abi
replace A_abi = A_abi + r(max)
summ age if A_abi!=.
replace A_abi = . if age==r(max)

svmat A_avg
summ A_avg
replace A_avg = A_avg + r(max)
summ age if A_avg!=.
replace A_avg = . if age==r(max)

line  pred_abi pred_rea A_abi A_rea age if age>=25 & age<=94, xlab(, $ls) ylab(, $ls) xtitle("Age", $size) ytitle("log(income)", $size)  lpattern(solid solid dash dash) lcolor(black gs6 black gs6) legend(ring(0) position(11) size($legs) label(1 "College, fitted") label(2 "High School, fitted") label(3 "College, age FE") label(4 "High School, age FE"))
graph export "$dest/income_fit_psid.pdf", as(pdf) replace
replace pred_rea = pred_rea + ($s_rea)^2/2 + $fix_rea
replace pred_abi = pred_abi + ($s_abi)^2/2 + $fix_abi

outsheet age pred_rea pred_abi using "$data/processed/IncomeProcess/IncomeProcess/trends_psid.csv", comma replace

